<!DOCTYPE html>
<html lang="zh-cn">
<head>
  <meta charset="utf-8">
  <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
  <title>11-分析Ajax请求并抓取今日头条街拍美图 - vzvixb</title>
  <meta name="renderer" content="webkit" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>

<meta http-equiv="Cache-Control" content="no-transform" />
<meta http-equiv="Cache-Control" content="no-siteapp" />

<meta name="theme-color" content="#f8f5ec" />
<meta name="msapplication-navbutton-color" content="#f8f5ec">
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style" content="#f8f5ec">


<meta name="author" content="even" /><meta name="description" content="10.分析Ajax请求并抓取今日头条街拍美图 config.py 1 2 3 4 MONGO_URL = &amp;#39;localhost&amp;#39; MONGO_DB = &amp;#39;taobao&amp;#39; MONGO_TABLE = &amp;#39;product&amp;#39; KEY_WORD = &amp;#39;美食&amp;#39; taobao_spider.py 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19" /><meta name="keywords" content="Hugo, theme, even" />






<meta name="generator" content="Hugo 0.90.1 with theme even" />


<link rel="canonical" href="https://zhouxiaoxin.gitee.io/post/python/11.selenium&#43;chrome%E7%88%AC%E5%8F%96%E6%B7%98%E5%AE%9D/" />
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">
<link rel="manifest" href="/manifest.json">
<link rel="mask-icon" href="/safari-pinned-tab.svg" color="#5bbad5">

<script async src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script>

<link href="/sass/main.min.32d4dc642fec98c34c80bebb9c784c50771712b4a8a25d9f4dd9cce3534b426e.css" rel="stylesheet">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fancyapps/fancybox@3.1.20/dist/jquery.fancybox.min.css" integrity="sha256-7TyXnr2YU040zfSP+rEcz29ggW4j56/ujTPwjMzyqFY=" crossorigin="anonymous">


<meta property="og:title" content="11-分析Ajax请求并抓取今日头条街拍美图" />
<meta property="og:description" content="10.分析Ajax请求并抓取今日头条街拍美图 config.py 1 2 3 4 MONGO_URL = &#39;localhost&#39; MONGO_DB = &#39;taobao&#39; MONGO_TABLE = &#39;product&#39; KEY_WORD = &#39;美食&#39; taobao_spider.py 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19" />
<meta property="og:type" content="article" />
<meta property="og:url" content="https://zhouxiaoxin.gitee.io/post/python/11.selenium&#43;chrome%E7%88%AC%E5%8F%96%E6%B7%98%E5%AE%9D/" /><meta property="article:section" content="post" />
<meta property="article:published_time" content="2018-09-08T16:00:56+08:00" />
<meta property="article:modified_time" content="2018-09-08T16:00:56+08:00" />

<meta itemprop="name" content="11-分析Ajax请求并抓取今日头条街拍美图">
<meta itemprop="description" content="10.分析Ajax请求并抓取今日头条街拍美图 config.py 1 2 3 4 MONGO_URL = &#39;localhost&#39; MONGO_DB = &#39;taobao&#39; MONGO_TABLE = &#39;product&#39; KEY_WORD = &#39;美食&#39; taobao_spider.py 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19"><meta itemprop="datePublished" content="2018-09-08T16:00:56+08:00" />
<meta itemprop="dateModified" content="2018-09-08T16:00:56+08:00" />
<meta itemprop="wordCount" content="691">
<meta itemprop="keywords" content="爬虫,Python," /><meta name="twitter:card" content="summary"/>
<meta name="twitter:title" content="11-分析Ajax请求并抓取今日头条街拍美图"/>
<meta name="twitter:description" content="10.分析Ajax请求并抓取今日头条街拍美图 config.py 1 2 3 4 MONGO_URL = &#39;localhost&#39; MONGO_DB = &#39;taobao&#39; MONGO_TABLE = &#39;product&#39; KEY_WORD = &#39;美食&#39; taobao_spider.py 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19"/>

<!--[if lte IE 9]>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/classlist/1.1.20170427/classList.min.js"></script>
<![endif]-->

<!--[if lt IE 9]>
  <script src="https://cdn.jsdelivr.net/npm/html5shiv@3.7.3/dist/html5shiv.min.js"></script>
  <script src="https://cdn.jsdelivr.net/npm/respond.js@1.4.2/dest/respond.min.js"></script>
<![endif]-->

</head>
<body>
  <div id="mobile-navbar" class="mobile-navbar">
  <div class="mobile-header-logo">
    <a href="/" class="logo">Even</a>
  </div>
  <div class="mobile-navbar-icon">
    <span></span>
    <span></span>
    <span></span>
  </div>
</div>
<nav id="mobile-menu" class="mobile-menu slideout-menu">
  <ul class="mobile-menu-list">
    <a href="/">
        <li class="mobile-menu-item">Home</li>
      </a><a href="/post/">
        <li class="mobile-menu-item">Archs</li>
      </a><a href="/tags/">
        <li class="mobile-menu-item">Tags</li>
      </a><a href="/categories/">
        <li class="mobile-menu-item">Cates</li>
      </a><a href="/about/">
        <li class="mobile-menu-item">About</li>
      </a><a href="/pages/runoob/">
        <li class="mobile-menu-item">runoob</li>
      </a><a href="/pages/98wubi/">
        <li class="mobile-menu-item">98wubi</li>
      </a>
  </ul>
</nav>
  <div class="container" id="mobile-panel">
    <header id="header" class="header">
        <div class="logo-wrapper">
  <a href="/" class="logo">Even</a>
</div>

<nav class="site-navbar">
  <ul id="menu" class="menu">
    <li class="menu-item">
        <a class="menu-item-link" href="/">Home</a>
      </li><li class="menu-item">
        <a class="menu-item-link" href="/post/">Archs</a>
      </li><li class="menu-item">
        <a class="menu-item-link" href="/tags/">Tags</a>
      </li><li class="menu-item">
        <a class="menu-item-link" href="/categories/">Cates</a>
      </li><li class="menu-item">
        <a class="menu-item-link" href="/about/">About</a>
      </li><li class="menu-item">
        <a class="menu-item-link" href="/pages/runoob/">runoob</a>
      </li><li class="menu-item">
        <a class="menu-item-link" href="/pages/98wubi/">98wubi</a>
      </li>
  </ul>
</nav>
    </header>

    <main id="main" class="main">
      <div class="content-wrapper">
        <div id="content" class="content">
          <article class="post">
    
    <header class="post-header">
      <h1 class="post-title">11-分析Ajax请求并抓取今日头条街拍美图</h1>

      <div class="post-meta">
        <span class="post-time"> 2018-09-08 </span>
        <div class="post-category">
            <a href="/categories/python/"> Python </a>
            </div>
          <span class="more-meta"> 约 691 字 </span>
          <span class="more-meta"> 预计阅读 2 分钟 </span>
        <span id="busuanzi_container_page_pv" class="more-meta"> <span id="busuanzi_value_page_pv"><img src="/img/spinner.svg" alt="spinner.svg"/></span> 次阅读 </span>
      </div>
    </header>

    <div class="post-toc" id="post-toc">
  <h2 class="post-toc-title">文章目录</h2>
  <div class="post-toc-content">
    <nav id="TableOfContents">
  <ul>
    <li><a href="#10分析ajax请求并抓取今日头条街拍美图">10.分析Ajax请求并抓取今日头条街拍美图</a></li>
  </ul>
</nav>
  </div>
</div>
    <div class="post-content">
      <h2 id="10分析ajax请求并抓取今日头条街拍美图">10.分析Ajax请求并抓取今日头条街拍美图</h2>
<p>config.py</p>
<div class="highlight"><div class="chroma">
<table class="lntable"><tr><td class="lntd">
<pre tabindex="0" class="chroma"><code><span class="lnt">1
</span><span class="lnt">2
</span><span class="lnt">3
</span><span class="lnt">4
</span></code></pre></td>
<td class="lntd">
<pre tabindex="0" class="chroma"><code class="language-py" data-lang="py"><span class="n">MONGO_URL</span> <span class="o">=</span> <span class="s1">&#39;localhost&#39;</span>
<span class="n">MONGO_DB</span> <span class="o">=</span> <span class="s1">&#39;taobao&#39;</span>
<span class="n">MONGO_TABLE</span> <span class="o">=</span> <span class="s1">&#39;product&#39;</span>
<span class="n">KEY_WORD</span> <span class="o">=</span> <span class="s1">&#39;美食&#39;</span>
</code></pre></td></tr></table>
</div>
</div><p>taobao_spider.py</p>
<div class="highlight"><div class="chroma">
<table class="lntable"><tr><td class="lntd">
<pre tabindex="0" class="chroma"><code><span class="lnt">  1
</span><span class="lnt">  2
</span><span class="lnt">  3
</span><span class="lnt">  4
</span><span class="lnt">  5
</span><span class="lnt">  6
</span><span class="lnt">  7
</span><span class="lnt">  8
</span><span class="lnt">  9
</span><span class="lnt"> 10
</span><span class="lnt"> 11
</span><span class="lnt"> 12
</span><span class="lnt"> 13
</span><span class="lnt"> 14
</span><span class="lnt"> 15
</span><span class="lnt"> 16
</span><span class="lnt"> 17
</span><span class="lnt"> 18
</span><span class="lnt"> 19
</span><span class="lnt"> 20
</span><span class="lnt"> 21
</span><span class="lnt"> 22
</span><span class="lnt"> 23
</span><span class="lnt"> 24
</span><span class="lnt"> 25
</span><span class="lnt"> 26
</span><span class="lnt"> 27
</span><span class="lnt"> 28
</span><span class="lnt"> 29
</span><span class="lnt"> 30
</span><span class="lnt"> 31
</span><span class="lnt"> 32
</span><span class="lnt"> 33
</span><span class="lnt"> 34
</span><span class="lnt"> 35
</span><span class="lnt"> 36
</span><span class="lnt"> 37
</span><span class="lnt"> 38
</span><span class="lnt"> 39
</span><span class="lnt"> 40
</span><span class="lnt"> 41
</span><span class="lnt"> 42
</span><span class="lnt"> 43
</span><span class="lnt"> 44
</span><span class="lnt"> 45
</span><span class="lnt"> 46
</span><span class="lnt"> 47
</span><span class="lnt"> 48
</span><span class="lnt"> 49
</span><span class="lnt"> 50
</span><span class="lnt"> 51
</span><span class="lnt"> 52
</span><span class="lnt"> 53
</span><span class="lnt"> 54
</span><span class="lnt"> 55
</span><span class="lnt"> 56
</span><span class="lnt"> 57
</span><span class="lnt"> 58
</span><span class="lnt"> 59
</span><span class="lnt"> 60
</span><span class="lnt"> 61
</span><span class="lnt"> 62
</span><span class="lnt"> 63
</span><span class="lnt"> 64
</span><span class="lnt"> 65
</span><span class="lnt"> 66
</span><span class="lnt"> 67
</span><span class="lnt"> 68
</span><span class="lnt"> 69
</span><span class="lnt"> 70
</span><span class="lnt"> 71
</span><span class="lnt"> 72
</span><span class="lnt"> 73
</span><span class="lnt"> 74
</span><span class="lnt"> 75
</span><span class="lnt"> 76
</span><span class="lnt"> 77
</span><span class="lnt"> 78
</span><span class="lnt"> 79
</span><span class="lnt"> 80
</span><span class="lnt"> 81
</span><span class="lnt"> 82
</span><span class="lnt"> 83
</span><span class="lnt"> 84
</span><span class="lnt"> 85
</span><span class="lnt"> 86
</span><span class="lnt"> 87
</span><span class="lnt"> 88
</span><span class="lnt"> 89
</span><span class="lnt"> 90
</span><span class="lnt"> 91
</span><span class="lnt"> 92
</span><span class="lnt"> 93
</span><span class="lnt"> 94
</span><span class="lnt"> 95
</span><span class="lnt"> 96
</span><span class="lnt"> 97
</span><span class="lnt"> 98
</span><span class="lnt"> 99
</span><span class="lnt">100
</span><span class="lnt">101
</span><span class="lnt">102
</span><span class="lnt">103
</span><span class="lnt">104
</span><span class="lnt">105
</span><span class="lnt">106
</span><span class="lnt">107
</span><span class="lnt">108
</span><span class="lnt">109
</span><span class="lnt">110
</span><span class="lnt">111
</span><span class="lnt">112
</span><span class="lnt">113
</span><span class="lnt">114
</span><span class="lnt">115
</span><span class="lnt">116
</span><span class="lnt">117
</span><span class="lnt">118
</span><span class="lnt">119
</span><span class="lnt">120
</span><span class="lnt">121
</span><span class="lnt">122
</span><span class="lnt">123
</span><span class="lnt">124
</span><span class="lnt">125
</span><span class="lnt">126
</span><span class="lnt">127
</span><span class="lnt">128
</span><span class="lnt">129
</span><span class="lnt">130
</span><span class="lnt">131
</span><span class="lnt">132
</span><span class="lnt">133
</span><span class="lnt">134
</span><span class="lnt">135
</span></code></pre></td>
<td class="lntd">
<pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="kn">from</span> <span class="nn">selenium</span> <span class="kn">import</span> <span class="n">webdriver</span>
<span class="kn">from</span> <span class="nn">selenium.webdriver.common.by</span> <span class="kn">import</span> <span class="n">By</span>
<span class="kn">from</span> <span class="nn">selenium.webdriver.support.ui</span> <span class="kn">import</span> <span class="n">WebDriverWait</span>
<span class="kn">from</span> <span class="nn">selenium.webdriver.support</span> <span class="kn">import</span> <span class="n">expected_conditions</span> <span class="k">as</span> <span class="n">EC</span>
<span class="kn">from</span> <span class="nn">selenium.common.exceptions</span> <span class="kn">import</span> <span class="n">TimeoutException</span>
<span class="kn">from</span> <span class="nn">pyquery</span> <span class="kn">import</span> <span class="n">PyQuery</span> <span class="k">as</span> <span class="n">pq</span>
<span class="kn">from</span> <span class="nn">config</span> <span class="kn">import</span> <span class="o">*</span>
<span class="kn">import</span> <span class="nn">re</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">time</span>
<span class="kn">import</span> <span class="nn">json</span>
<span class="kn">import</span> <span class="nn">pymongo</span>

<span class="n">client</span> <span class="o">=</span> <span class="n">pymongo</span><span class="o">.</span><span class="n">MongoClient</span><span class="p">(</span><span class="n">MONGO_URL</span><span class="p">)</span>
<span class="n">db</span> <span class="o">=</span> <span class="n">client</span><span class="p">[</span><span class="n">MONGO_DB</span><span class="p">]</span>

<span class="k">class</span> <span class="nc">TaobaoSpider</span><span class="p">():</span>

    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">browser</span> <span class="o">=</span> <span class="s1">&#39;&#39;</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">wait</span> <span class="o">=</span> <span class="s1">&#39;&#39;</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">key_world</span> <span class="o">=</span> <span class="n">KEY_WORD</span>

    <span class="k">def</span> <span class="nf">parse_url</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">url</span><span class="p">):</span>  <span class="c1"># 通用解析url</span>
        <span class="k">pass</span>

    <span class="k">def</span> <span class="nf">get_selenium</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>  <span class="c1"># selenium操作</span>
        <span class="k">try</span><span class="p">:</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">browser</span> <span class="o">=</span> <span class="n">browser</span> <span class="o">=</span> <span class="n">webdriver</span><span class="o">.</span><span class="n">Chrome</span><span class="p">()</span>
            <span class="n">browser</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;http://www.taobao.com/&#39;</span><span class="p">)</span>
            <span class="c1"># 设置等待请求成功</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">wait</span> <span class="o">=</span> <span class="n">wait</span> <span class="o">=</span> <span class="n">WebDriverWait</span><span class="p">(</span><span class="n">browser</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
            <span class="nb">input</span> <span class="o">=</span> <span class="n">wait</span><span class="o">.</span><span class="n">until</span><span class="p">(</span>
                <span class="n">EC</span><span class="o">.</span><span class="n">presence_of_element_located</span><span class="p">((</span><span class="n">By</span><span class="o">.</span><span class="n">ID</span><span class="p">,</span> <span class="s2">&#34;q&#34;</span><span class="p">))</span>
            <span class="p">)</span>
            <span class="n">submit</span> <span class="o">=</span> <span class="n">wait</span><span class="o">.</span><span class="n">until</span><span class="p">(</span>
                <span class="n">EC</span><span class="o">.</span><span class="n">element_to_be_clickable</span><span class="p">((</span><span class="n">By</span><span class="o">.</span><span class="n">CSS_SELECTOR</span><span class="p">,</span> <span class="s2">&#34;.btn-search&#34;</span><span class="p">))</span>
            <span class="p">)</span>
            <span class="nb">input</span><span class="o">.</span><span class="n">send_keys</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">key_world</span><span class="p">)</span>
            <span class="n">submit</span><span class="o">.</span><span class="n">click</span><span class="p">()</span>
            <span class="n">total</span> <span class="o">=</span> <span class="n">wait</span><span class="o">.</span><span class="n">until</span><span class="p">(</span>
                <span class="n">EC</span><span class="o">.</span><span class="n">presence_of_element_located</span><span class="p">(</span>
                    <span class="p">(</span><span class="n">By</span><span class="o">.</span><span class="n">CSS_SELECTOR</span><span class="p">,</span> <span class="s1">&#39;.m-page .total&#39;</span><span class="p">))</span>
            <span class="p">)</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">get_data_list</span><span class="p">()</span>
            <span class="k">return</span> <span class="n">total</span><span class="o">.</span><span class="n">text</span>
        <span class="k">except</span> <span class="n">TimeoutException</span><span class="p">:</span>
            <span class="nb">print</span><span class="p">(</span><span class="s1">&#39;超时出错！！！&#39;</span><span class="p">)</span>
            <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_selenium</span><span class="p">()</span>

    <span class="k">def</span> <span class="nf">next_page</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">page_num</span><span class="p">):</span>
        <span class="n">wait</span><span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">wait</span>
        <span class="n">borwser</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">browser</span>
        <span class="k">try</span><span class="p">:</span>
            <span class="nb">input</span> <span class="o">=</span> <span class="n">wait</span><span class="o">.</span><span class="n">until</span><span class="p">(</span>
                <span class="n">EC</span><span class="o">.</span><span class="n">presence_of_element_located</span><span class="p">((</span><span class="n">By</span><span class="o">.</span><span class="n">CSS_SELECTOR</span><span class="p">,</span> <span class="s2">&#34;.m-page .input&#34;</span><span class="p">))</span>
            <span class="p">)</span>
            <span class="n">submit</span> <span class="o">=</span> <span class="n">wait</span><span class="o">.</span><span class="n">until</span><span class="p">(</span>
                <span class="n">EC</span><span class="o">.</span><span class="n">element_to_be_clickable</span><span class="p">((</span><span class="n">By</span><span class="o">.</span><span class="n">CSS_SELECTOR</span><span class="p">,</span> <span class="s2">&#34;.m-page .btn&#34;</span><span class="p">))</span>
            <span class="p">)</span>
            <span class="nb">input</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
            <span class="nb">input</span><span class="o">.</span><span class="n">send_keys</span><span class="p">(</span><span class="n">page_num</span><span class="p">)</span>
            <span class="n">submit</span><span class="o">.</span><span class="n">click</span><span class="p">()</span>
            <span class="n">wait</span><span class="o">.</span><span class="n">until</span><span class="p">(</span>
                <span class="n">EC</span><span class="o">.</span><span class="n">text_to_be_present_in_element</span><span class="p">((</span><span class="n">By</span><span class="o">.</span><span class="n">CSS_SELECTOR</span><span class="p">,</span><span class="s1">&#39;.m-page li.item.active &gt; span.num&#39;</span><span class="p">),</span> <span class="nb">str</span><span class="p">(</span><span class="n">page_num</span><span class="p">))</span>
            <span class="p">)</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">get_data_list</span><span class="p">()</span>
        <span class="k">except</span> <span class="n">TimeoutException</span><span class="p">:</span>
            <span class="nb">print</span><span class="p">(</span><span class="s1">&#39;获取下一页出错！！！&#39;</span><span class="p">)</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">next_page</span><span class="p">(</span><span class="n">page_num</span><span class="p">)</span>

    <span class="k">def</span> <span class="nf">get_data_list</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>  <span class="c1"># 获取数据</span>
        <span class="n">wait</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">wait</span>
        <span class="n">borwser</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">browser</span>
        <span class="n">wait</span><span class="o">.</span><span class="n">until</span><span class="p">(</span>
            <span class="n">EC</span><span class="o">.</span><span class="n">presence_of_element_located</span><span class="p">((</span><span class="n">By</span><span class="o">.</span><span class="n">CSS_SELECTOR</span><span class="p">,</span> <span class="s1">&#39;#mainsrp-itemlist .items .item&#39;</span><span class="p">))</span>
        <span class="p">)</span>
        <span class="n">html</span> <span class="o">=</span> <span class="n">borwser</span><span class="o">.</span><span class="n">page_source</span>
        <span class="n">html</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&#34;xmlns&#34;</span><span class="p">,</span> <span class="s1">&#39;data-id&#39;</span><span class="p">)</span>
        <span class="n">doc</span> <span class="o">=</span> <span class="n">pq</span><span class="p">(</span><span class="n">html</span><span class="p">)</span>
        <span class="n">doc</span><span class="o">.</span><span class="n">remove_namespaces</span><span class="p">()</span>
        <span class="n">items</span> <span class="o">=</span> <span class="n">doc</span><span class="p">(</span><span class="s1">&#39;#mainsrp-itemlist .items .item&#39;</span><span class="p">)</span>
        <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">items</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
            <span class="n">produce</span> <span class="o">=</span> <span class="p">{</span>
                <span class="s1">&#39;img_src&#39;</span><span class="p">:</span> <span class="s1">&#39;https:&#39;</span><span class="o">+</span><span class="n">item</span><span class="p">(</span><span class="s1">&#39;.pic-box img.img&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">attr</span><span class="p">(</span><span class="s1">&#39;src&#39;</span><span class="p">),</span>
                <span class="s1">&#39;price&#39;</span><span class="p">:</span> <span class="n">item</span><span class="p">(</span><span class="s1">&#39;.price&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">text</span><span class="p">(),</span>
                <span class="s1">&#39;deal&#39;</span><span class="p">:</span> <span class="n">item</span><span class="p">(</span><span class="s1">&#39;.deal-cnt&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">text</span><span class="p">()[:</span><span class="o">-</span><span class="mi">3</span><span class="p">],</span>
                <span class="s1">&#39;title&#39;</span><span class="p">:</span> <span class="n">item</span><span class="p">(</span><span class="s1">&#39;.title&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">text</span><span class="p">(),</span>
                <span class="s1">&#39;shop&#39;</span><span class="p">:</span> <span class="n">item</span><span class="p">(</span><span class="s1">&#39;.shop .shopname .dsrs+span&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">text</span><span class="p">(),</span>
                <span class="s1">&#39;location&#39;</span><span class="p">:</span> <span class="n">item</span><span class="p">(</span><span class="s1">&#39;.location&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">text</span><span class="p">()</span>
            <span class="p">}</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">save_mongodb</span><span class="p">(</span><span class="n">produce</span><span class="p">)</span>

    <span class="k">def</span> <span class="nf">save_local</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>  <span class="c1"># 保存到本地</span>
        <span class="k">pass</span>

    <span class="k">def</span> <span class="nf">save_mongodb</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">):</span>  <span class="c1"># 保存在mongodb数据库</span>
        <span class="k">try</span><span class="p">:</span>
            <span class="k">if</span> <span class="n">db</span><span class="p">[</span><span class="n">MONGO_TABLE</span><span class="p">]</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span><span class="n">result</span><span class="p">):</span>
                <span class="nb">print</span><span class="p">(</span><span class="s1">&#39;保存mogodb 成功！！！&#39;</span><span class="p">,</span> <span class="n">result</span><span class="p">)</span>
        <span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span>
            <span class="nb">print</span><span class="p">(</span><span class="s1">&#39;储存到mongodb失败！&#39;</span><span class="p">,</span> <span class="n">result</span><span class="p">)</span>

    <span class="k">def</span> <span class="nf">run</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>  <span class="c1"># 实现主要逻辑</span>
        
        <span class="k">try</span><span class="p">:</span>
            <span class="c1"># 1. 使用selenium访问淘宝输入关键之并点击搜索</span>
            <span class="n">totle</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_selenium</span><span class="p">()</span>
            <span class="n">totle</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="sa">r</span><span class="s1">&#39;\d+&#39;</span><span class="p">,</span> <span class="n">totle</span><span class="p">)</span><span class="o">.</span><span class="n">group</span><span class="p">()</span>
            <span class="nb">print</span><span class="p">(</span><span class="n">totle</span><span class="p">)</span>
            
            <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="nb">int</span><span class="p">(</span><span class="n">totle</span><span class="p">)</span><span class="o">+</span><span class="mi">1</span><span class="p">):</span>
                <span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span>
                <span class="bp">self</span><span class="o">.</span><span class="n">next_page</span><span class="p">(</span><span class="n">i</span><span class="p">)</span>

            <span class="c1"># 2. 解析获取的网页请求数据</span>

            <span class="c1"># 3. 提取有用的数据</span>
            
            
            <span class="c1"># 4. 保存在本地</span>

            <span class="c1"># 5. 保存在mogodb数据库</span>
        <span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span>
            <span class="nb">print</span><span class="p">(</span><span class="s1">&#39;出错了!!&#39;</span><span class="p">)</span>
        <span class="k">finally</span><span class="p">:</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">browser</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>

            


<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
    <span class="n">taobao</span> <span class="o">=</span> <span class="n">TaobaoSpider</span><span class="p">()</span>
    <span class="n">taobao</span><span class="o">.</span><span class="n">run</span><span class="p">()</span>

</code></pre></td></tr></table>
</div>
</div>
    </div>

    <div class="post-copyright">
  <p class="copyright-item">
    <span class="item-title">文章作者</span>
    <span class="item-content">even</span>
  </p>
  <p class="copyright-item">
    <span class="item-title">上次更新</span>
    <span class="item-content">
        2018-09-08
        
    </span>
  </p>
  
  
</div>
<footer class="post-footer">
      <div class="post-tags">
          <a href="/tags/%E7%88%AC%E8%99%AB/">爬虫</a>
          <a href="/tags/python/">Python</a>
          </div>
      <nav class="post-nav">
        <a class="prev" href="/post/python/10.%E5%88%86%E6%9E%90ajax%E8%AF%B7%E6%B1%82%E5%B9%B6%E6%8A%93%E5%8F%96%E4%BB%8A%E6%97%A5%E5%A4%B4%E6%9D%A1%E8%A1%97%E6%8B%8D%E7%BE%8E%E5%9B%BE-/">
            <i class="iconfont icon-left"></i>
            <span class="prev-text nav-default">10-分析Ajax请求并抓取今日头条街拍美图 </span>
            <span class="prev-text nav-mobile">上一篇</span>
          </a>
        <a class="next" href="/post/python/12.scrapy%E6%A1%86%E6%9E%B6%E7%AC%94%E8%AE%B0/">
            <span class="next-text nav-default">12-Scrapy框架</span>
            <span class="next-text nav-mobile">下一篇</span>
            <i class="iconfont icon-right"></i>
          </a>
      </nav>
    </footer>
  </article>
        </div>
        

  

  

      </div>
    </main>

    <footer id="footer" class="footer">
      <div class="social-links">
      <a href="mailto:your@email.com" class="iconfont icon-email" title="email"></a>
      <a href="http://localhost:1313" class="iconfont icon-stack-overflow" title="stack-overflow"></a>
      <a href="http://localhost:1313" class="iconfont icon-twitter" title="twitter"></a>
      <a href="http://localhost:1313" class="iconfont icon-facebook" title="facebook"></a>
      <a href="http://localhost:1313" class="iconfont icon-linkedin" title="linkedin"></a>
      <a href="http://localhost:1313" class="iconfont icon-google" title="google"></a>
      <a href="http://localhost:1313" class="iconfont icon-github" title="github"></a>
      <a href="http://localhost:1313" class="iconfont icon-weibo" title="weibo"></a>
      <a href="http://localhost:1313" class="iconfont icon-zhihu" title="zhihu"></a>
      <a href="http://localhost:1313" class="iconfont icon-douban" title="douban"></a>
      <a href="http://localhost:1313" class="iconfont icon-pocket" title="pocket"></a>
      <a href="http://localhost:1313" class="iconfont icon-tumblr" title="tumblr"></a>
      <a href="http://localhost:1313" class="iconfont icon-instagram" title="instagram"></a>
      <a href="http://localhost:1313" class="iconfont icon-gitlab" title="gitlab"></a>
      <a href="http://localhost:1313" class="iconfont icon-bilibili" title="bilibili"></a>
  <a href="https://zhouxiaoxin.gitee.io/index.xml" type="application/rss+xml" class="iconfont icon-rss" title="rss"></a>
</div>

<div class="copyright">
  <span class="power-by">
    由 <a class="hexo-link" href="https://gohugo.io">Hugo</a> 强力驱动
  </span>
  <span class="division">|</span>
  <span class="theme-info">
    主题 - 
    <a class="theme-link" href="https://github.com/olOwOlo/hugo-theme-even">Even</a>
  </span>

  <div class="busuanzi-footer">
    <span id="busuanzi_container_site_pv"> 本站总访问量 <span id="busuanzi_value_site_pv"><img src="/img/spinner.svg" alt="spinner.svg"/></span> 次 </span>
      <span class="division">|</span>
    <span id="busuanzi_container_site_uv"> 本站总访客数 <span id="busuanzi_value_site_uv"><img src="/img/spinner.svg" alt="spinner.svg"/></span> 人 </span>
  </div>

  <span class="copyright-year">
    &copy; 
    2018 - 
    2022
    <span class="heart">
      <i class="iconfont icon-heart"></i>
    </span>
    <span class="author">even</span>
  </span>
</div>
    </footer>

    <div class="back-to-top" id="back-to-top">
      <i class="iconfont icon-up"></i>
    </div>
  </div>
  
  <script src="https://cdn.jsdelivr.net/npm/jquery@3.2.1/dist/jquery.min.js" integrity="sha256-hwg4gsxgFZhOsEEamdOYGBf13FyQuiTwlAQgxVSNgt4=" crossorigin="anonymous"></script>
  <script src="https://cdn.jsdelivr.net/npm/slideout@1.0.1/dist/slideout.min.js" integrity="sha256-t+zJ/g8/KXIJMjSVQdnibt4dlaDxc9zXr/9oNPeWqdg=" crossorigin="anonymous"></script>
  <script src="https://cdn.jsdelivr.net/npm/@fancyapps/fancybox@3.1.20/dist/jquery.fancybox.min.js" integrity="sha256-XVLffZaxoWfGUEbdzuLi7pwaUJv1cecsQJQqGLe7axY=" crossorigin="anonymous"></script>



<script type="text/javascript" src="/js/main.min.2517c0eb67172a0bae917de4af59b10ca2531411a009d4c0b82f5685259e5771.js"></script>








</body>
</html>
